########################################################
## This code creates predictions using models trained on other unemployment durations.
########################################################

########################################################
## Preparation of the workspace
########################################################

## remove all objects from the current workspace
rm(list=ls())

## load the required packages
library(haven)
library(caret)
library(randomForest)
library(doParallel)
library(mice)
library(plyr)
library(dplyr)
library(VIM)
library(base)
library(ranger)
library(glmnet)
library(xgboost)

## display the current time -> to check how much time it takes to run the code
start_time = Sys.time()

## set the directories
main <- "placeholder_main"

dataDirectory <- paste0(main, "/Data")
RDirectory <- paste0(main, "/Programs/Output Generation")

## Load the tuning and training functions:
source(paste0(RDirectory, "/102_0_caret_parameter_tuning_Function.R"))
source(paste0(RDirectory, "/103_0_caret_predictions_Function.R"))

########################################################
## Define the locals for the loop
########################################################
 
# Define the model that is used (the name of the model is a part of the name of the .dta file with data)
#models <- c("Full")
model_list <- "Full"

# Define the outcome variables that are predicted
dependent_variable <- c("emplAft6M_0M_In", "emplAft6M_6M_In", "emplAft6M_12M_In")

# Define the time period:
years <- 1992:2016


##################################################
## Creating predictions for people unemployed in X month using Y month model
########################################################   

 for(model in model_list){
   
   for (y in years) {

    for(dependent in dependent_variable){

        print(paste(dependent,"Start")) ## display the time when loop starts for a year and y variable

        ########################################################
        ## Load the dataset
        ##################################################
        data <- read_dta(paste0(dataDirectory, "/002_DataForR_", model, "_", y,".dta"))

        ########################################################
        ## Define the sub-sample for creation of predictions
        ########################################################

        ## Keeping only 30% of sample for creation of predictions
        n_parametertuning <- round(nrow(data)*0.1, digits=0) + 1
        n_training <- round((nrow(data)*0.3), digits=0)
        n_training_plus_tuning <- n_training + n_parametertuning
        n_training_plus_tuning_1 <- n_training_plus_tuning + 1

        first_column <- which(colnames(data)=="Gender") ## Identify column number where the variables of interest start

        ## Keeping only those observations with non-missing outcome variables
        y_column <- which(colnames(data) == dependent) ## Identify column number with the dependent variable of interest
        data <- data[complete.cases(data[, y_column]), ] ## Restrict the dataset to observations with non-missing dependent variable

        ## Creating dataset for training and predictions
        data_pred <- data[data$n_order >= n_training_plus_tuning_1,] ## Data for predictions (hold-out sample)

        total_y <- factor(data_pred[[dependent]]) ## declare the y variable as a factor/category
        total_x <- data_pred[, first_column:ncol(data_pred)] ## Keeping all the possible covariates we want to have in the model
        total_final <- as.data.frame(cbind(total_y, total_x)) ## Creating final dataset used for predictions

        persinfo <- as.data.frame(cbind(data_pred$LopNr_PersonNr, data_pred$n)) ## Creating a dataset with individual ID and n for the predictions

        ## Correcting format of outcome variables
        levels(total_final$total_y) <- c("no", "yes")

        ## Removes the initial dataset that was loaded and the other intermediate datasets
        rm("data")
        rm("data_pred")
        rm("total_final")

        ##

        for (unempl in c(0, 6, 12)) {

          if (dependent !=paste0("emplAft6M_",unempl,"M_In")) {

            ########################################################
            ## Load models
            ########################################################

            load(file=paste0(dataDirectory,"/103_Models_", model, "_emplAft6M_", unempl, "M_In", "_", y, ".rda"))

            ########################################################
            ## Create predictions
            ########################################################

            ## Creating predictions Random Forest
            prob_rf <- predict(models$rff_final, total_x)
            prob_rf <- prob_rf[["predictions"]]
            prob_rf = data.frame(prob_rf[,2])

            ## Creating predictions Gradient Boost
            total_x_boost = data.matrix(total_x)
            prob_boost <- predict(models$rboost_final, total_x_boost)
            rm("total_x_boost") ## remove the data format that is specific gradient boost

            ## Creating predictions LASSO
            total_x_lasso = data.matrix(total_x)
            prob_lasso <- predict(models$rlasso_final, total_x_lasso, type = "response")
            rm("total_x_lasso") ## remove the data format that is specific to lasso

            ########################################################
            ## Save predictions
            ########################################################
            output <- cbind(total_y, prob_rf, prob_boost, prob_lasso, persinfo) ## put all the predictions together, with personal ID, n and outcome variable
            write.csv(output, file=paste0(dataDirectory,"/103_predictionsR_", model,"_", dependent, "_", y,"_emplAft6M_",unempl,"M_In_Model.csv"))
            rm("output") ## remove the dataset with output

            print(paste(dependent,"End")) ## display the time when loop ends for a year and y variable



          }

        }

  }

  }
}
